??regression.?lm.## (para que se possa copiar o código e o resultado que estará na forma de cometário).
Data A; /*IMPORTAÇÃO DE DADOS A PARTIR DE UM ARQUIVO DE TEXTO*/
Infile "C:\Users\bah_m\OneDrive\Documentos2\Doutorado\Disciplinas\Grupo de estudos_R\Ovinos.dat";
Input Animal$ 1-12 Pai$ 14-25 Mae$ 27-38 NascMae 40-43 Sx$ 45 DN 47-53 PN 56-58 .1 GMJD$ 60 DD 62-69 PD 71-72 CND 74 PRD 76
MCD 78 GMJS$ 80 DS 82-89 PS 91-93 CNS 95 PRS 97 MCS 99 TParto 101 anonasc 103-106 idmae 108-109 idd 111-113 ids 115-117
GC 119-120 Ano 122-125 PE 127-128 IPP 130-131 p180 133-134 IDMO 136-137;
Proc print; /*IMPRIMIR DADOS NO OUTPUT*/
Run; /*RODAR O QUE ESTÁ DENTRO DO DATA*/
# Arquivos com largura fixa (fixed width format) devem ser lidos com `read.fwf`
dataA <- read.fwf("Ovinos.dat"
, widths = c(12, -1, 12, -1, 12, -1, 4, -1, 1, -1, 7, -2, 3, -1, 1, -1, 8, -1, 2, -1, 1, -1, 1, -1, 1, -1, 1, -1, 8, -1, 3, -1, 1, -1, 1, -1, 1, -1, 1, -1, 4, -1, 2, -1, 3, -1, 3, -1, 2, -1, 4, -1, 2, -1, 2, -1, 2, -1, 2) # indicar a quantidade de caracteres para cada variável. O negativo indica ignorar aquela quantidade de caracteres
, col.names = c("Animal", "Pai", "Mae", "NascMae", "Sx", "DN", "PN", "GMJD", "DD", "PD", "CND", "PRD", "MCD", "GMJS", "DS", "PS", "CNS", "PRS", "MCS", "TParto", "anonasc", "idmae", "idd", "ids", "GC", "Ano", "PE", "IPP", "p180", "IDMO")
, stringsAsFactor = FALSE)
# leitura de arquivos com tamanho fixo faz com que espaços em branco façam parte de cada observação
# uma forma interessante de verificar os dados é usando a função `str`
str(dataA)## 'data.frame': 2940 obs. of 30 variables:
## $ Animal : chr "100D1997 " "100D2003 " "100S1996 " "100S1998 " ...
## $ Pai : chr "563S1994 " "514D2001 " "563S1994 " "3721 " ...
## $ Mae : chr "31D1991 " "15D2000 " "85D1994 " "49S1994 " ...
## $ NascMae: chr "1991" "2000" "1994" "1994" ...
## $ Sx : int 2 2 2 2 2 2 2 2 2 2 ...
## $ DN : chr " 13740" " 15935" " 13369" " 14114" ...
## $ PN : chr "4.5" "5.8" "5.0" "9.0" ...
## $ GMJD : int 1 1 1 2 1 1 3 1 3 1 ...
## $ DD : chr " 13840" " 16001" " 13461" " 14204" ...
## $ PD : chr "22" "26" "37" "39" ...
## $ CND : chr "2" "3" "3" "4" ...
## $ PRD : chr "2" "3" "4" "3" ...
## $ MCD : chr "2" "3" "3" "3" ...
## $ GMJS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ DS : chr " 13947" " 16116" " 13573" " 14312" ...
## $ PS : int 32 32 43 65 43 21 41 42 51 32 ...
## $ CNS : chr "2" "1" "4" "4" ...
## $ PRS : chr "2" "1" "3" "4" ...
## $ MCS : chr "2" "1" "3" "4" ...
## $ TParto : chr "2" "2" "1" "1" ...
## $ anonasc: chr "1997" "2003" "1996" "1998" ...
## $ idmae : chr " 6" " 3" " 2" " 4" ...
## $ idd : chr "100" " 66" " 92" " 90" ...
## $ ids : chr "207" "181" "204" "198" ...
## $ GC : chr " ." " ." " ." " ." ...
## $ Ano : chr " ." " ." " ." "2000" ...
## $ PE : chr " ." " ." " ." " ." ...
## $ IPP : chr " ." " ." " ." "24" ...
## $ p180 : chr " ." " ." " ." " ." ...
## $ IDMO : chr " ." " ." " ." " 6" ...
# Para retirar os espaços em branco das extremidades:
for(i in names(dataA)){
dataA[, i] <- trimws(dataA[, i])
}
# função head mostrará as primeiras n observações (default = 6)
head(dataA, n = 10)## Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND PRD MCD
## 1 100D1997 563S1994 31D1991 1991 2 13740 4.5 1 13840 22 2 2 2
## 2 100D2003 514D2001 15D2000 2000 2 15935 5.8 1 16001 26 3 3 3
## 3 100S1996 563S1994 85D1994 1994 2 13369 5.0 1 13461 37 3 4 3
## 4 100S1998 3721 49S1994 1994 2 14114 9.0 2 14204 39 4 3 3
## 5 100S2004 561S2003 48D2000 2000 2 16298 8.0 1 16409 37 3 3 3
## 6 100S2005 539D2004 1D2003 2003 2 16667 4.5 1 16804 16 1 1 1
## 7 100S2006 507S2003 59D2003 2003 2 17049 5.0 3 17127 30 3 3 3
## 8 100T2000 522D1998 30D1994 1994 2 14842 4.0 1 14939 37 2 3 3
## 9 100T2001 605D2000 70T1997 1997 2 15215 4.5 3 15306 35 3 4 3
## 10 101D1996 3721 2D1994 1994 2 13369 5.0 1 13461 18 1 1 1
## GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano PE IPP p180
## 1 1 13947 32 2 2 2 2 1997 6 100 207 . . . . .
## 2 1 16116 32 1 1 1 2 2003 3 66 181 . . . . .
## 3 1 13573 43 4 3 3 1 1996 2 92 204 . . . . .
## 4 1 14312 65 4 4 4 1 1998 4 90 198 . 2000 . 24 .
## 5 1 16485 43 4 3 4 1 2004 4 111 187 . . . . .
## 6 1 16853 21 1 1 1 1 2005 2 137 186 . . . . .
## 7 1 17197 41 3 3 3 1 2006 3 78 148 . . . . .
## 8 1 15001 42 3 3 3 3 2000 6 97 159 . 2001 . 12 .
## 9 1 15383 51 3 4 3 3 2001 4 91 168 . 2003 . 23 .
## 10 1 13573 32 2 2 2 2 1996 2 92 204 . 1998 . 24 .
## IDMO
## 1 .
## 2 .
## 3 .
## 4 6
## 5 .
## 6 .
## 7 .
## 8 7
## 9 6
## 10 4
# Outra forma de visualizar é escolhendo as linhas
dataA[70:80, ]## Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND PRD MCD
## 70 108D2000 577S1998 43D1992 1992 2 14842 3.5 1 14939 30 3 2 3
## 71 108D2002 627D2001 43T2000 2000 2 15586 3.0 4 15671 32 2 2 3
## 72 108D2003 540D2000 76T1997 1997 2 15936 5.0 1 16001 23 3 3 3
## 73 108D2005 GMF 074 35D2004 2004 2 16668 4.5 1 16804 30 3 3 2
## 74 108S2006 507S2003 103S2001 2001 2 17053 5.0 1 17127 24 3 1 3
## 75 109D1997 554S1993 98D1994 1994 2 13744 5.5 1 13840 27 3 3 3
## 76 109D1998 572D1996 3D1996 1996 2 14120 4.0 2 14204 22 2 2 2
## 77 109D1999 616D1997 71D1997 1997 2 14479 5.0 1 14547 19 2 2 2
## 78 109D2001 650D2000 79D1996 1996 2 15216 4.2 1 15306 25 2 2 2
## 79 109S2003 504S2002 118S2000 2000 2 15936 9.0 1 16001 25 2 2 2
## 80 109S2004 561S2003 12D2001 2001 2 16300 8.0 1 16409 49 4 4 4
## GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano PE IPP p180
## 70 1 15001 35 3 3 3 2 2000 8 97 159 . . . . .
## 71 1 15749 50 4 4 3 2 2002 2 85 163 . 2003 . 12 .
## 72 1 16116 40 2 2 2 2 2003 6 65 180 . 2005 . 24 .
## 73 1 16853 38 3 3 2 2 2005 1 136 185 . 2007 . 23 .
## 74 1 17197 29 3 3 3 1 2006 5 74 144 . . . . .
## 75 1 13947 51 4 4 3 2 1997 3 96 203 . 1999 . 25 .
## 76 1 14312 43 3 4 3 2 1998 2 84 192 . 2000 . 23 .
## 77 1 14660 50 3 3 3 2 1999 2 68 181 . 2001 . 23 .
## 78 1 15383 35 2 2 2 2 2001 5 90 167 . . . . .
## 79 1 16116 51 4 4 4 1 2003 3 65 180 . . . . .
## 80 1 16485 51 3 4 3 1 2004 3 109 185 . . . . .
## IDMO
## 70 .
## 71 3
## 72 8
## 73 3
## 74 .
## 75 5
## 76 4
## 77 4
## 78 .
## 79 .
## 80 .
# No R, deve-se usar `NA` para informação perdida
dataA[dataA == "."] <- NA
Data B;
Set A; /*CHAMAR ARQUIVOS PRESENTES NOS DADOS ANTERIORES*/
Proc sort; by Sx; /*SORTEIO DE DADOS DO MAIOR PARA O MENOR OU EM ORDEM ALFABÉTICA*/
Proc print; Run;
dataB <- dataA[order(dataA$Sx), ]
head(dataB)## Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND PRD MCD
## 910 501D1992 WALKER89 34ZWDM1990 1990 1 11881 5.0 1 12066 43 4 4 5
## 911 501D1999 616D1997 50D1995 1995 1 14449 5.0 1 14526 34 3 4 3
## 912 501D2000 568D1999 44D1992 1992 1 14827 4.9 1 14939 37 2 2 2
## 913 501D2003 559S2002 90D1999 1999 1 15917 4.0 1 16001 35 4 4 5
## 914 501D2004 502S2002 143T2000 2000 1 16284 4.5 1 16409 34 3 3 3
## 915 501D2006 611D2005 93D2001 2001 1 17016 5.0 1 17127 49 3 3 4
## GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano PE IPP
## 910 1 12129 55 4 4 3 2 1992 2 185 248 <NA> <NA> <NA> <NA>
## 911 1 14660 56 4 4 3 2 1999 4 77 211 16 <NA> 35 <NA>
## 912 1 15001 41 2 2 2 2 2000 8 112 174 19 <NA> 25 <NA>
## 913 1 16116 45 3 4 4 2 2003 4 84 199 26 <NA> 31 <NA>
## 914 1 16485 36 2 3 2 2 2004 4 125 201 29 <NA> 34 <NA>
## 915 1 17197 57 4 3 3 2 2006 5 111 181 34 <NA> 34 <NA>
## p180 IDMO
## 910 <NA> <NA>
## 911 56 <NA>
## 912 41 <NA>
## 913 45 <NA>
## 914 36 <NA>
## 915 57 <NA>
Data C; Set A;
Proc sort nodupkey; by Anonasc; /*O NODUPKEY APRESENTA SOMENTE A PRIMEIRA APARIÇÃO DE CADA NÍVEL DA VARIÁVEL SORTEADA*/
Proc print; Run;
dataC <- dataA[order(dataA$anonasc, na.last = FALSE),] # ordenando
dataC <- dataC[!duplicated(dataC$anonasc),] # excluindo os que nao sao os primeiros
dataC## Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND
## 1477 54D2004 627D2001 23D2002 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 172 11S1992 102Y 42S1987 1987 2 11889 5.6 1 12066 47 5
## 81 10D1993 92-035 54T1991 1991 2 12250 4.0 1 12367 33 4
## 82 10D1994 554S1993 32D1992 1992 2 12636 4.0 1 12719 30 3
## 83 10D1995 4035 15D1991 1991 2 13000 5.0 1 13095 44 5
## 3 100S1996 563S1994 85D1994 1994 2 13369 5.0 1 13461 37 3
## 1 100D1997 563S1994 31D1991 1991 2 13740 4.5 1 13840 22 2
## 4 100S1998 3721 49S1994 1994 2 14114 9.0 2 14204 39 4
## 25 102S1999 572D1996 96S1997 1997 2 14474 5.0 1 14547 27 3
## 8 100T2000 522D1998 30D1994 1994 2 14842 4.0 1 14939 37 2
## 9 100T2001 605D2000 70T1997 1997 2 15215 4.5 3 15306 35 3
## 26 102S2002 650D2000 117D2000 2000 2 15583 5.3 1 15671 40 4
## 2 100D2003 514D2001 15D2000 2000 2 15935 5.8 1 16001 26 3
## 5 100S2004 561S2003 48D2000 2000 2 16298 8.0 1 16409 37 3
## 6 100S2005 539D2004 1D2003 2003 2 16667 4.5 1 16804 16 1
## 7 100S2006 507S2003 59D2003 2003 2 17049 5.0 3 17127 30 3
## 34 103S2007 536S2006 62D2005 2005 2 17404 5.0 1 17477 34 2
## PRD MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC
## 1477 <NA> <NA> <NA> <NA> 45 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 172 5 4 1 12129 55 4 4 4 1 1992 5 177 240 <NA>
## 81 4 3 1 12472 37 3 3 3 2 1993 2 117 222 <NA>
## 82 3 3 1 12822 45 3 4 2 2 1994 2 83 186 <NA>
## 83 5 5 1 13207 56 5 5 4 2 1995 4 95 207 <NA>
## 3 4 3 1 13573 43 4 3 3 1 1996 2 92 204 <NA>
## 1 2 2 1 13947 32 2 2 2 2 1997 6 100 207 <NA>
## 4 3 3 1 14312 65 4 4 4 1 1998 4 90 198 <NA>
## 25 3 3 1 14660 55 3 4 3 1 1999 2 73 186 <NA>
## 8 3 3 1 15001 42 3 3 3 3 2000 6 97 159 <NA>
## 9 4 3 1 15383 51 3 4 3 3 2001 4 91 168 <NA>
## 26 4 4 1 15749 55 5 5 4 1 2002 2 88 166 <NA>
## 2 3 3 1 16116 32 1 1 1 2 2003 3 66 181 <NA>
## 5 3 3 1 16485 43 4 3 4 1 2004 4 111 187 <NA>
## 6 1 1 1 16853 21 1 1 1 1 2005 2 137 186 <NA>
## 7 3 3 1 17197 41 3 3 3 1 2006 3 78 148 <NA>
## 34 3 3 1 17539 48 3 3 3 1 2007 2 73 135 <NA>
## Ano PE IPP p180 IDMO
## 1477 2006 <NA> 24 <NA> 4
## 172 <NA> <NA> <NA> <NA> <NA>
## 81 1996 <NA> 37 <NA> 5
## 82 1996 <NA> 24 <NA> 4
## 83 <NA> <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA> <NA>
## 1 <NA> <NA> <NA> <NA> <NA>
## 4 2000 <NA> 24 <NA> 6
## 25 <NA> <NA> <NA> <NA> <NA>
## 8 2001 <NA> 12 <NA> 7
## 9 2003 <NA> 23 <NA> 6
## 26 2003 <NA> 12 <NA> 3
## 2 <NA> <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA>
## 7 <NA> <NA> <NA> <NA> <NA>
## 34 <NA> <NA> <NA> <NA> <NA>
Data D; Set A;
Proc sort; by anonasc gmjd sx; /*SORTEIO PRIMEIRO POR ANO DE NASCIMENTO, DEPOIS POR GRUPO DE MANEJO E POR ÚLTIMO POR SEXO*/
Proc print; Run;
# Ordenando por ano de nascimento, grupo de manejo e sexo
dataD <- dataA[order(dataA$anonasc, dataA$GMJD, dataA$Sx, na.last = FALSE), ]
head(dataD)## Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND
## 1477 54D2004 627D2001 23D2002 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2499 667Q1999 4397 52S1993 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 910 501D1992 WALKER89 34ZWDM1990 1990 1 11881 5.0 1 12066 43 4
## 923 502S1992 102Y 37D1989 1989 1 11881 5.5 1 12066 40 4
## 925 503D1992 542D1990 5ZHBK1990 1990 1 11882 3.9 1 12066 37 3
## 934 504S1992 RIDGE 28D1988 1988 1 11884 4.8 1 12066 48 4
## PRD MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC
## 1477 <NA> <NA> <NA> <NA> 45 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2499 <NA> <NA> <NA> <NA> 60 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 18
## 910 4 5 1 12129 55 4 4 3 2 1992 2 185 248 <NA>
## 923 3 3 1 12129 50 3 2 3 1 1992 3 185 248 <NA>
## 925 2 2 1 12129 46 4 3 3 2 1992 2 184 247 <NA>
## 934 5 4 1 12129 58 4 3 3 1 1992 4 182 245 <NA>
## Ano PE IPP p180 IDMO
## 1477 2006 <NA> 24 <NA> 4
## 2499 <NA> 33 <NA> 60 <NA>
## 910 <NA> <NA> <NA> <NA> <NA>
## 923 <NA> <NA> <NA> <NA> <NA>
## 925 <NA> <NA> <NA> <NA> <NA>
## 934 <NA> <NA> <NA> <NA> <NA>
Data E; Set D;
GC1 = anonasc||gmjd||sx; /*VARIÁVEIS QUE VÃO FORMAR O GRUPO DE CONTEMPORÂNEOS*/
Proc sort; by GC1;
Proc print; Run;
dataE <- dataD
# Criando a variável GC1
dataE$GC1 <- paste(dataE$anonasc, dataE$GMJD, dataE$Sx, sep = "")
# Observe que o paste tranforma `NA` em caractere perdendo sua funcionalidade original
# Corrigindo as informações perdidas
dataE[dataE$GC1 == "NANANA", "GC1"] <- NA
head(dataE)## Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND
## 1477 54D2004 627D2001 23D2002 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2499 667Q1999 4397 52S1993 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 910 501D1992 WALKER89 34ZWDM1990 1990 1 11881 5.0 1 12066 43 4
## 923 502S1992 102Y 37D1989 1989 1 11881 5.5 1 12066 40 4
## 925 503D1992 542D1990 5ZHBK1990 1990 1 11882 3.9 1 12066 37 3
## 934 504S1992 RIDGE 28D1988 1988 1 11884 4.8 1 12066 48 4
## PRD MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC
## 1477 <NA> <NA> <NA> <NA> 45 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2499 <NA> <NA> <NA> <NA> 60 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 18
## 910 4 5 1 12129 55 4 4 3 2 1992 2 185 248 <NA>
## 923 3 3 1 12129 50 3 2 3 1 1992 3 185 248 <NA>
## 925 2 2 1 12129 46 4 3 3 2 1992 2 184 247 <NA>
## 934 5 4 1 12129 58 4 3 3 1 1992 4 182 245 <NA>
## Ano PE IPP p180 IDMO GC1
## 1477 2006 <NA> 24 <NA> 4 <NA>
## 2499 <NA> 33 <NA> 60 <NA> <NA>
## 910 <NA> <NA> <NA> <NA> <NA> 199211
## 923 <NA> <NA> <NA> <NA> <NA> 199211
## 925 <NA> <NA> <NA> <NA> <NA> 199211
## 934 <NA> <NA> <NA> <NA> <NA> 199211
Data F; Set E;
GC2 = lag(GC1); /*LAG - REPETE UMA VARIÁVEL UMA LINHA ABAIXO. USAMOS BASTANTE PARA COMPARAR DADOS*/
If GC1 not= GC2 then GC3+1; /*PROCEDIMENTO IF E NUMERAÇÃO SEQUENCIAL*/
Proc print; Run;
dataF <- dataE
dataF$GC2 <- c(NA, head(dataF$GC1, -1)) # equivalente lag do SAS
# Como o SAS implicitamente funciona como um loop nessas estruturas de `Data`, a forma mais parecida de renumerar no R necessitará de um "loop" explícito e o R não é bom nisso. Por isso, o grupo será renumerado com uso de duas funções simples e sem necessitar do GC2
dataF$GC3 <- as.numeric(factor(dataF$GC1, exclude = NULL)) # exclude NULL para montar um fator com NA, nesse caso ele será o último fator. Por default a função `factor` ignora o NA
head(dataF)## Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND
## 1477 54D2004 627D2001 23D2002 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2499 667Q1999 4397 52S1993 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 910 501D1992 WALKER89 34ZWDM1990 1990 1 11881 5.0 1 12066 43 4
## 923 502S1992 102Y 37D1989 1989 1 11881 5.5 1 12066 40 4
## 925 503D1992 542D1990 5ZHBK1990 1990 1 11882 3.9 1 12066 37 3
## 934 504S1992 RIDGE 28D1988 1988 1 11884 4.8 1 12066 48 4
## PRD MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC
## 1477 <NA> <NA> <NA> <NA> 45 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 2499 <NA> <NA> <NA> <NA> 60 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 18
## 910 4 5 1 12129 55 4 4 3 2 1992 2 185 248 <NA>
## 923 3 3 1 12129 50 3 2 3 1 1992 3 185 248 <NA>
## 925 2 2 1 12129 46 4 3 3 2 1992 2 184 247 <NA>
## 934 5 4 1 12129 58 4 3 3 1 1992 4 182 245 <NA>
## Ano PE IPP p180 IDMO GC1 GC2 GC3
## 1477 2006 <NA> 24 <NA> 4 <NA> <NA> 114
## 2499 <NA> 33 <NA> 60 <NA> <NA> <NA> 114
## 910 <NA> <NA> <NA> <NA> <NA> 199211 <NA> 1
## 923 <NA> <NA> <NA> <NA> <NA> 199211 199211 1
## 925 <NA> <NA> <NA> <NA> <NA> 199211 199211 1
## 934 <NA> <NA> <NA> <NA> <NA> 199211 199211 1
Data G; Set F;
Proc freq; Table GC3/out=x; /*NOVA MEMÓRIA COM A SAÍDA "X", ONDE O NGC É A CONTAGEM QUE FOI FEITA - CONTAR ANIMAIS DENTRO DE GC*/
Run;
table(dataF$GC3)##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 53 48 9 9 7 4 3 1 31 39 22 19 2 1 1 1 65 56 13 13
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 6 8 1 52 43 16 19 7 7 2 92 97 13 14 13 7 1 3 3 1
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 76 91 10 7 14 6 6 4 55 56 27 25 11 7 4 6 99 94 9 12
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 22 18 67 92 11 14 18 24 2 7 51 54 8 14 7 9 61 55 8 7
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 4 5 1 57 86 10 7 20 37 2 6 93 89 10 13 13 16 66 85 2
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114
## 3 17 15 1 2 95 84 2 10 50 63 2 2 2
x <- as.data.frame(table(dataF$GC3)) # salvando a frequência como data.frame em x
Data H; Set X;
NGC = count; /*VARIÁVEL CONTADORA*/
Drop count percent; /*DROP - COMANDO PARA RETIRAR COLUNAS QUE NÃO SERÃO MAIS NECESSÁRIAS*/
Proc print; Run;
names(x) <- c("GC3", "NGC") # Renomeia as variáveis
head(x)## GC3 NGC
## 1 1 53
## 2 2 48
## 3 3 9
## 4 4 9
## 5 5 7
## 6 6 4
Data I;
Merge F H; By GC3; /*MERGE - JUNTAR DADOS COLOCANDO AS COLUNAS LADO A LADO*/
If NGC < 15 then delete;
Proc print; Run;
# Observe que não foi necessário criar dataG já que o resultado foi gravado em x e nem o dataH pois o table só faz a frequência absoluta que é de interesse. O equivalente ao DROP será mostrado mais adiante
dataI <- merge(dataF, x, by = "GC3", all.x = TRUE)
dataI <- dataI[!(dataI$NGC < 15), ]
head(dataI)## GC3 Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND PRD
## 1 1 501D1992 WALKER89 34ZWDM1990 1990 1 11881 5.0 1 12066 43 4 4
## 2 1 502S1992 102Y 37D1989 1989 1 11881 5.5 1 12066 40 4 3
## 3 1 503D1992 542D1990 5ZHBK1990 1990 1 11882 3.9 1 12066 37 3 2
## 4 1 504S1992 RIDGE 28D1988 1988 1 11884 4.8 1 12066 48 4 5
## 5 1 505S1992 RIDGE 34D1987 1987 1 11887 7.0 1 12066 48 4 4
## 6 1 506D1992 542D1990 39D1990 1990 1 11888 3.5 1 12066 40 4 4
## MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano PE
## 1 5 1 12129 55 4 4 3 2 1992 2 185 248 <NA> <NA> <NA>
## 2 3 1 12129 50 3 2 3 1 1992 3 185 248 <NA> <NA> <NA>
## 3 2 1 12129 46 4 3 3 2 1992 2 184 247 <NA> <NA> <NA>
## 4 4 1 12129 58 4 3 3 1 1992 4 182 245 <NA> <NA> <NA>
## 5 4 1 12129 57 3 3 3 1 1992 5 179 242 <NA> <NA> <NA>
## 6 4 1 12129 49 3 4 3 2 1992 2 178 241 <NA> <NA> <NA>
## IPP p180 IDMO GC1 GC2 NGC
## 1 <NA> <NA> <NA> 199211 <NA> 53
## 2 <NA> <NA> <NA> 199211 199211 53
## 3 <NA> <NA> <NA> 199211 199211 53
## 4 <NA> <NA> <NA> 199211 199211 53
## 5 <NA> <NA> <NA> 199211 199211 53
## 6 <NA> <NA> <NA> 199211 199211 53
Data J; Set I;
GC4 = lag(NGC);
If NGC not= GC4 then GC+1;
Proc print; Run;
# ???????????????????
#dataJ$GC <- as.numeric(factor(dataI$NGC, levels = unique(dataI$NGC), exclude = NULL))
dataJ <- dataI
dataJ$GC4 <- c(NA, head(dataJ$NGC, -1))
gci <- 1
for(i in 1:nrow(dataJ)){
if((dataJ$NGC[i] != dataJ$GC4[i]) | is.na(dataJ$GC4[i])){
dataJ$GC[i] <- gci
gci <- gci+1
}
}
head(dataJ)## GC3 Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND PRD
## 1 1 501D1992 WALKER89 34ZWDM1990 1990 1 11881 5.0 1 12066 43 4 4
## 2 1 502S1992 102Y 37D1989 1989 1 11881 5.5 1 12066 40 4 3
## 3 1 503D1992 542D1990 5ZHBK1990 1990 1 11882 3.9 1 12066 37 3 2
## 4 1 504S1992 RIDGE 28D1988 1988 1 11884 4.8 1 12066 48 4 5
## 5 1 505S1992 RIDGE 34D1987 1987 1 11887 7.0 1 12066 48 4 4
## 6 1 506D1992 542D1990 39D1990 1990 1 11888 3.5 1 12066 40 4 4
## MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano PE
## 1 5 1 12129 55 4 4 3 2 1992 2 185 248 1 <NA> <NA>
## 2 3 1 12129 50 3 2 3 1 1992 3 185 248 <NA> <NA> <NA>
## 3 2 1 12129 46 4 3 3 2 1992 2 184 247 <NA> <NA> <NA>
## 4 4 1 12129 58 4 3 3 1 1992 4 182 245 <NA> <NA> <NA>
## 5 4 1 12129 57 3 3 3 1 1992 5 179 242 <NA> <NA> <NA>
## 6 4 1 12129 49 3 4 3 2 1992 2 178 241 <NA> <NA> <NA>
## IPP p180 IDMO GC1 GC2 NGC GC4
## 1 <NA> <NA> <NA> 199211 <NA> 53 NA
## 2 <NA> <NA> <NA> 199211 199211 53 53
## 3 <NA> <NA> <NA> 199211 199211 53 53
## 4 <NA> <NA> <NA> 199211 199211 53 53
## 5 <NA> <NA> <NA> 199211 199211 53 53
## 6 <NA> <NA> <NA> 199211 199211 53 53
Data K; Set J;
Proc GLM; /*ANÁLISE DE VARIÂNCIA DE DADOS DESBALANCEADOS*/
Class GC; /*EFEITOS FIXOS*/
Model PE = GC ids /solution; /*O SOLUTION PERMITE QUE O PROGRAMA APRESENTE AS SOLUÇÕES DOS NÍVEIS DOS EFEITOS FIXOS,
APRESENTA O INTERCEPTO E OS BETAS PARA A REGRESSÃO*/
Run;
# Reconhecendo os tipos de variáveis que irão compor o modelo
dataJ$GC <- factor(as.numeric(dataJ$GC)) # isso é o equivalente ao informado no `Class` do Proc GLM do SAS
dataJ$ids <- as.numeric(dataJ$ids)
dataJ$PE <- as.numeric(dataJ$PE)
# vamos armezenar no objeto dataK o resultado da função lm
dataK <- lm(PE ~ GC + ids, data = dataJ)
# em dataK já teremos então as soluções do modelo
dataK##
## Call:
## lm(formula = PE ~ GC + ids, data = dataJ)
##
## Coefficients:
## (Intercept) GC2 GC3 GC5 GC6 GC7
## 9.43055 -1.48008 3.64397 2.71082 3.69764 1.04647
## GC9 GC10 GC11 GC13 GC14 GC15
## 1.75648 2.97288 3.86439 5.00801 4.56749 1.81403
## GC16 GC17 GC18 GC19 GC21 GC22
## 4.73082 3.71690 6.10163 6.01263 4.28046 3.42015
## GC23 GC24 GC25 GC26 GC27 GC28
## 8.25288 3.30348 -1.32985 6.41514 12.51547 3.22941
## GC29 GC31 GC32 GC33 GC34 GC35
## 4.92878 5.72770 3.98785 1.48033 2.14356 7.28166
## GC37 GC40 GC42 GC44 ids
## 5.04784 -0.95216 3.08741 6.99029 0.09712
# para mais detalhes usamos as funções `summary`
summary(dataK)##
## Call:
## lm(formula = PE ~ GC + ids, data = dataJ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.5212 -1.6677 0.3553 2.0257 8.7321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.43055 2.83244 3.329 0.000900 ***
## GC2 -1.48008 1.21013 -1.223 0.221575
## GC3 3.64397 0.83209 4.379 1.31e-05 ***
## GC5 2.71082 0.79991 3.389 0.000728 ***
## GC6 3.69764 1.24873 2.961 0.003134 **
## GC7 1.04647 0.70094 1.493 0.135751
## GC9 1.75648 3.38775 0.518 0.604234
## GC10 2.97288 0.72156 4.120 4.09e-05 ***
## GC11 3.86439 3.42193 1.129 0.259029
## GC13 5.00801 0.75632 6.622 5.67e-11 ***
## GC14 4.56749 0.91255 5.005 6.54e-07 ***
## GC15 1.81403 3.39324 0.535 0.593040
## GC16 4.73082 0.72727 6.505 1.20e-10 ***
## GC17 3.71690 3.39406 1.095 0.273715
## GC18 6.10163 1.17128 5.209 2.28e-07 ***
## GC19 6.01263 0.94286 6.377 2.70e-10 ***
## GC21 4.28046 1.25734 3.404 0.000688 ***
## GC22 3.42015 0.87816 3.895 0.000105 ***
## GC23 8.25288 3.42987 2.406 0.016293 *
## GC24 3.30348 0.84497 3.910 9.84e-05 ***
## GC25 -1.32985 3.41826 -0.389 0.697323
## GC26 6.41514 0.80051 8.014 2.95e-15 ***
## GC27 12.51547 3.46115 3.616 0.000313 ***
## GC28 3.22941 1.12146 2.880 0.004062 **
## GC29 4.92878 0.73679 6.690 3.64e-11 ***
## GC31 5.72770 3.39277 1.688 0.091670 .
## GC32 3.98785 0.79023 5.046 5.30e-07 ***
## GC33 1.48033 1.18100 1.253 0.210322
## GC34 2.14356 0.92502 2.317 0.020679 *
## GC35 7.28166 3.41153 2.134 0.033040 *
## GC37 5.04784 3.38871 1.490 0.136629
## GC40 -0.95216 3.38871 -0.281 0.778782
## GC42 3.08741 3.40847 0.906 0.365247
## GC44 6.99029 3.40701 2.052 0.040444 *
## ids 0.09712 0.01326 7.327 4.69e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.332 on 1046 degrees of freedom
## (1429 observations deleted due to missingness)
## Multiple R-squared: 0.3346, Adjusted R-squared: 0.3129
## F-statistic: 15.47 on 34 and 1046 DF, p-value: < 2.2e-16
# a soma de quadrados é do tipo sequencial, equivalente ao Type I do GLM do SAS
# Para ter a soma de quadrados equivalente ao tipo III do GLM do SAS basta mudar a ordem das variáveis no modelo e utilizar a soma de quadrados da última variável (VÁLIDO SOMENTE COM MODELOS SEM INTERAÇÃO).
anova(dataK)## Analysis of Variance Table
##
## Response: PE
## Df Sum Sq Mean Sq F value Pr(>F)
## GC 33 5242.7 158.87 14.309 < 2.2e-16 ***
## ids 1 596.1 596.09 53.688 4.691e-13 ***
## Residuals 1046 11613.6 11.10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Ou usando a função drop1 e solicitando o teste F.
drop1(dataK, test = "F")## Single term deletions
##
## Model:
## PE ~ GC + ids
## Df Sum of Sq RSS AIC F value Pr(>F)
## <none> 11614 2636.6
## GC 33 3261.1 14875 2838.1 8.9006 < 2.2e-16 ***
## ids 1 596.1 12210 2688.7 53.6877 4.691e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Incompatilidade dos resultados devido ao problema na formação dos grupos contemporâneos. Observe a diferença de graus de liberdade para este fator.
Data L; Set J;
PEIDS = 17.08775211 + (0.09343783*IDS); /*REGRESSÃO PARA TER O PERÍMETRO PREDITO*/
Output;
Do IDS = 128 TO 265 BY 1; /*PARA PROCURAR CADA NÍVEL DA VARIÁVEL QUE VAI SER USADA NA CORREÇÃO E COLOCAR O PERÍMETRO PREDITO*/
End;
Proc print; Run;
### R
dataL <- dataJ
# cria a variável PEIDS com base em ids
dataL$PEIDS <- 17.08775211 + (0.09343783*dataL$ids)
# Aqui é para restringir a criação dessa variável somente para os animais com ids no intervalo entre 128 e 265 dias
dataL[dataL$ids >= 128 & dataL$ids <= 265, ]$PEIDS <- 17.08775211 + (0.09343783*dataL[dataL$ids >= 128 & dataL$ids <= 265, "ids"])
head(dataL)## GC3 Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND PRD
## 1 1 501D1992 WALKER89 34ZWDM1990 1990 1 11881 5.0 1 12066 43 4 4
## 2 1 502S1992 102Y 37D1989 1989 1 11881 5.5 1 12066 40 4 3
## 3 1 503D1992 542D1990 5ZHBK1990 1990 1 11882 3.9 1 12066 37 3 2
## 4 1 504S1992 RIDGE 28D1988 1988 1 11884 4.8 1 12066 48 4 5
## 5 1 505S1992 RIDGE 34D1987 1987 1 11887 7.0 1 12066 48 4 4
## 6 1 506D1992 542D1990 39D1990 1990 1 11888 3.5 1 12066 40 4 4
## MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano PE IPP
## 1 5 1 12129 55 4 4 3 2 1992 2 185 248 1 <NA> NA <NA>
## 2 3 1 12129 50 3 2 3 1 1992 3 185 248 <NA> <NA> NA <NA>
## 3 2 1 12129 46 4 3 3 2 1992 2 184 247 <NA> <NA> NA <NA>
## 4 4 1 12129 58 4 3 3 1 1992 4 182 245 <NA> <NA> NA <NA>
## 5 4 1 12129 57 3 3 3 1 1992 5 179 242 <NA> <NA> NA <NA>
## 6 4 1 12129 49 3 4 3 2 1992 2 178 241 <NA> <NA> NA <NA>
## p180 IDMO GC1 GC2 NGC GC4 PEIDS
## 1 <NA> <NA> 199211 <NA> 53 NA 40.26033
## 2 <NA> <NA> 199211 199211 53 53 40.26033
## 3 <NA> <NA> 199211 199211 53 53 40.16690
## 4 <NA> <NA> 199211 199211 53 53 39.98002
## 5 <NA> <NA> 199211 199211 53 53 39.69971
## 6 <NA> <NA> 199211 199211 53 53 39.60627
DATA M; SET L;
PROC MEANS; VAR IDS;
PROC SORT; BY IDS; /*SORTEAR PARA ACHAR O ANIMAL COM O VALOR PADRÃO DE IDADE, OU SEJA, O VALOR PARA QUAL SE QUER CORRIGIR*/
PROC PRINT; RUN;
summary(dataL$ids)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 128.0 171.0 186.0 186.9 202.0 265.0
length(dataL$ids) # N## [1] 2510
mean(dataL$ids) # Mean## [1] 186.8749
sd(dataL$ids) # Std Dev## [1] 21.64507
min(dataL$ids) # Minimum## [1] 128
max(dataL$ids) # Maximum## [1] 265
dataM <- dataL[order(dataL$ids), ]
dataM[dataM$ids == 180, ]## GC3 Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND PRD
## 287 17 553S1994 504S1993 45D1989 1989 1 12642 6.3 1 12719 34 3 3
## 288 17 554D1994 143 04721992 1992 1 12642 6.0 1 12719 32 2 2
## 340 18 40S1994 141 82D1991 1991 2 12642 4.9 1 12719 34 3 4
## 341 18 41D1994 143 42D1992 1992 2 12642 3.8 1 12719 25 2 3
## 342 18 42D1994 143 42D1992 1992 2 12642 3.7 1 12719 30 3 3
## 343 18 43S1994 92-035 32D1990 1990 2 12642 7.0 1 12719 39 4 5
## 502 25 60S1995 3721 34D1987 1987 2 13027 7.0 1 13095 18 1 1
## 503 25 61S1995 4035 22S1993 1993 2 13027 7.2 1 13095 37 4 5
## 1153 51 648S1998 3721 6D1996 1996 1 14132 9.8 2 14204 25 2 2
## 1154 51 649S1998 572D1996 15D1991 1991 1 14132 7.5 2 14204 33 3 4
## 1171 52 123S1998 524T1996 23S1991 1991 2 14132 6.0 2 14204 21 2 2
## 1314 58 112D1999 595D1997 32D1992 1992 2 14480 4.0 1 14547 23 4 5
## 1717 71 552S2001 540D2000 117S1996 1996 1 15203 2.0 1 15306 46 4 4
## 1769 72 37D2001 650D2000 144S1999 1999 2 15203 4.0 1 15306 35 3 4
## 1770 72 38D2001 650D2000 144S1999 1999 2 15203 3.5 1 15306 40 4 5
## 1771 72 39D2001 605D2000 4D1998 1998 2 15203 5.7 1 15306 37 4 4
## 1864 77 545D2002 627D2001 86D1997 1997 1 15569 5.2 1 15671 34 3 3
## 1865 77 546D2002 627D2001 86D1997 1997 1 15569 5.0 1 15671 33 2 2
## 1921 78 38S2002 606S2001 95D2000 2000 2 15569 4.0 1 15671 38 3 4
## 1922 78 39S2002 650D2000 64D1999 1999 2 15569 7.2 1 15671 30 3 3
## 2036 84 597D2003 540D2000 76T1997 1997 1 15936 4.7 1 16001 28 2 2
## 2042 85 107S2003 627D2001 46D2001 2001 2 15936 5.8 1 16001 31 4 4
## 2043 85 108D2003 540D2000 76T1997 1997 2 15936 5.0 1 16001 23 3 3
## 2044 85 109S2003 504S2002 118S2000 2000 2 15936 9.0 1 16001 25 2 2
## 2045 85 110D2003 514D2001 94T2000 2000 2 15936 2.7 1 16001 19 1 2
## 2046 85 112D2003 502S2002 1D2000 2000 2 15936 4.5 1 16001 23 2 2
## 2047 85 113D2003 502S2002 1D2000 2000 2 15936 5.2 1 16001 25 2 2
## 2294 92 646S2004 627D2001 6D2002 2002 1 16305 7.0 1 16409 33 2 2
## 2317 93 130S2004 444 74T2003 2003 2 16305 3.0 1 16409 35 4 4
## 2318 93 131S2004 627D2001 55D2002 2002 2 16305 8.0 1 16409 25 1 1
## 2596 102 616S2005 559S2002 86D1999 1999 1 16672 5.0 3 16803 29 3 2
## 2597 102 617D2005 561S2003 105S2001 2001 1 16672 6.0 3 16803 21 1 1
## 2598 102 620S2005 561S2003 123S2000 2000 1 16672 7.0 3 16803 34 2 2
## 2754 107 2D2006 602D2005 7D2004 2004 2 17017 3.0 1 17127 32 4 3
## 2764 107 3D2006 602D2005 7D2004 2004 2 17017 3.5 1 17127 30 3 3
## 2917 111 7S2007 524D2005 104D2003 2003 2 17359 6.0 1 17477 50 5 5
## MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano PE
## 287 3 1 12822 40 1 1 1 1 1994 5 77 180 3 <NA> 26
## 288 2 1 12822 54 3 3 3 2 1994 2 77 180 3 <NA> 28
## 340 4 1 12822 54 4 5 4 1 1994 3 77 180 <NA> <NA> NA
## 341 3 1 12822 38 2 3 3 2 1994 2 77 180 <NA> 1996 NA
## 342 3 1 12822 43 3 3 3 2 1994 2 77 180 <NA> <NA> NA
## 343 5 1 12822 49 4 4 4 1 1994 4 77 180 <NA> 1996 NA
## 502 1 1 13207 34 2 2 2 1 1995 8 68 180 <NA> 1997 NA
## 503 4 1 13207 54 4 4 3 1 1995 2 68 180 <NA> 1997 NA
## 1153 2 1 14312 43 2 2 2 1 1998 2 72 180 14 <NA> 24
## 1154 4 1 14312 64 3 4 3 1 1998 7 72 180 14 <NA> 37
## 1171 2 1 14312 46 3 4 3 1 1998 7 72 180 <NA> 2001 NA
## 1314 4 1 14660 57 3 4 3 2 1999 7 67 180 <NA> 2001 NA
## 1717 4 1 15383 60 4 4 4 1 2001 5 103 180 22 <NA> 33
## 1769 3 1 15383 51 4 4 4 2 2001 2 103 180 <NA> <NA> NA
## 1770 5 1 15383 58 5 5 4 2 2001 2 103 180 <NA> 2003 NA
## 1771 4 1 15383 55 4 5 4 2 2001 3 103 180 <NA> 2003 NA
## 1864 3 1 15749 50 3 4 3 2 2002 5 102 180 24 <NA> 32
## 1865 2 1 15749 47 4 5 4 2 2002 5 102 180 24 <NA> 30
## 1921 4 1 15749 39 3 4 4 1 2002 2 102 180 <NA> 2004 NA
## 1922 3 1 15749 43 3 3 3 1 2002 3 102 180 <NA> 2004 NA
## 2036 2 1 16116 59 4 4 4 2 2003 6 65 180 26 <NA> 36
## 2042 4 1 16116 62 4 5 5 1 2003 2 65 180 <NA> <NA> NA
## 2043 3 1 16116 40 2 2 2 2 2003 6 65 180 <NA> 2005 NA
## 2044 2 1 16116 51 4 4 4 1 2003 3 65 180 <NA> <NA> NA
## 2045 2 1 16116 44 2 4 4 2 2003 3 65 180 <NA> 2005 NA
## 2046 3 1 16116 47 3 4 4 2 2003 3 65 180 <NA> <NA> NA
## 2047 2 1 16116 48 3 3 3 2 2003 3 65 180 <NA> 2005 NA
## 2294 2 1 16485 30 2 1 3 1 2004 2 104 180 <NA> <NA> NA
## 2317 3 1 16485 39 2 2 2 1 2004 1 104 180 <NA> <NA> NA
## 2318 1 1 16485 31 1 1 1 1 2004 2 104 180 <NA> <NA> NA
## 2596 2 1 16852 40 3 2 3 1 2005 6 131 180 42 <NA> 30
## 2597 2 1 16852 37 2 2 2 2 2005 4 131 180 33 <NA> 24
## 2598 3 1 16852 56 4 4 5 1 2005 5 131 180 33 <NA> 35
## 2754 4 1 17197 42 4 5 5 2 2006 2 110 180 <NA> <NA> NA
## 2764 3 1 17197 36 2 3 3 2 2006 2 110 180 <NA> <NA> NA
## 2917 5 1 17539 68 5 5 5 1 2007 4 118 180 <NA> <NA> NA
## IPP p180 IDMO GC1 GC2 NGC GC4 PEIDS
## 287 <NA> 40 <NA> 199411 199411 65 65 33.90656
## 288 <NA> 54 <NA> 199411 199411 65 65 33.90656
## 340 <NA> <NA> <NA> 199412 199412 56 56 33.90656
## 341 23 <NA> 4 199412 199412 56 56 33.90656
## 342 <NA> <NA> <NA> 199412 199412 56 56 33.90656
## 343 25 <NA> 6 199412 199412 56 56 33.90656
## 502 23 <NA> 10 199512 199512 43 43 33.90656
## 503 23 <NA> 4 199512 199512 43 43 33.90656
## 1153 <NA> 43 <NA> 199821 199821 27 27 33.90656
## 1154 <NA> 64 <NA> 199821 199821 27 27 33.90656
## 1171 36 <NA> 10 199822 199822 25 25 33.90656
## 1314 24 <NA> 9 199912 199912 94 94 33.90656
## 1717 <NA> 60 <NA> 200111 200111 51 51 33.90656
## 1769 <NA> <NA> <NA> 200112 200112 54 54 33.90656
## 1770 24 <NA> 4 200112 200112 54 54 33.90656
## 1771 24 <NA> 5 200112 200112 54 54 33.90656
## 1864 <NA> 50 <NA> 200211 200211 61 61 33.90656
## 1865 <NA> 47 <NA> 200211 200211 61 61 33.90656
## 1921 23 <NA> 4 200212 200212 55 55 33.90656
## 1922 24 <NA> 5 200212 200212 55 55 33.90656
## 2036 <NA> 59 <NA> 200311 200311 57 57 33.90656
## 2042 <NA> <NA> <NA> 200312 200312 86 86 33.90656
## 2043 24 <NA> 8 200312 200312 86 86 33.90656
## 2044 <NA> <NA> <NA> 200312 200312 86 86 33.90656
## 2045 24 <NA> 5 200312 200312 86 86 33.90656
## 2046 <NA> <NA> <NA> 200312 200312 86 86 33.90656
## 2047 24 <NA> 5 200312 200312 86 86 33.90656
## 2294 <NA> <NA> <NA> 200411 200411 93 93 33.90656
## 2317 <NA> <NA> <NA> 200412 200412 89 89 33.90656
## 2318 <NA> <NA> <NA> 200412 200412 89 89 33.90656
## 2596 <NA> 40 <NA> 200531 200522 17 85 33.90656
## 2597 <NA> 37 <NA> 200531 200531 17 17 33.90656
## 2598 <NA> 56 <NA> 200531 200531 17 17 33.90656
## 2754 <NA> <NA> <NA> 200612 200612 84 84 33.90656
## 2764 <NA> <NA> <NA> 200612 200612 84 84 33.90656
## 2917 <NA> <NA> <NA> 200712 200712 63 63 33.90656
DATA N; SET M;
FCIDS = 33.9066/PEIDS; /*O FATOR DE CORREÇÃO SERÁ O PERÍMETRO DO PADRÃO DIVIDIDO PELO PERÍMETRO PREDITO DO ANIMAL*/
Proc print; RUN;
dataN <- dataM
unique(dataM[dataM$ids == 180, "PEIDS"])## [1] 33.90656
dataN$FCIDS <- 33.9066/dataN$PEIDS
dataN[923:936, ]## GC3 Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND PRD
## 2772 107 4D2006 538D2004 71T2001 2001 2 17018 6.0 1 17127 33 3 4
## 2829 110 511S2007 611D2005 105D2004 2004 1 17360 6.5 1 17477 44 5 5
## 2830 110 512S2007 524D2005 47D2002 2002 1 17360 6.5 1 17477 52 4 3
## 287 17 553S1994 504S1993 45D1989 1989 1 12642 6.3 1 12719 34 3 3
## 288 17 554D1994 143 04721992 1992 1 12642 6.0 1 12719 32 2 2
## 340 18 40S1994 141 82D1991 1991 2 12642 4.9 1 12719 34 3 4
## 341 18 41D1994 143 42D1992 1992 2 12642 3.8 1 12719 25 2 3
## 342 18 42D1994 143 42D1992 1992 2 12642 3.7 1 12719 30 3 3
## 343 18 43S1994 92-035 32D1990 1990 2 12642 7.0 1 12719 39 4 5
## 502 25 60S1995 3721 34D1987 1987 2 13027 7.0 1 13095 18 1 1
## 503 25 61S1995 4035 22S1993 1993 2 13027 7.2 1 13095 37 4 5
## 1153 51 648S1998 3721 6D1996 1996 1 14132 9.8 2 14204 25 2 2
## 1154 51 649S1998 572D1996 15D1991 1991 1 14132 7.5 2 14204 33 3 4
## 1171 52 123S1998 524T1996 23S1991 1991 2 14132 6.0 2 14204 21 2 2
## MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano PE
## 2772 3 1 17197 35 2 3 4 2 2006 5 109 179 <NA> <NA> NA
## 2829 5 1 17539 58 5 5 5 1 2007 3 117 179 <NA> <NA> NA
## 2830 5 1 17539 64 5 5 5 1 2007 5 117 179 <NA> <NA> NA
## 287 3 1 12822 40 1 1 1 1 1994 5 77 180 3 <NA> 26
## 288 2 1 12822 54 3 3 3 2 1994 2 77 180 3 <NA> 28
## 340 4 1 12822 54 4 5 4 1 1994 3 77 180 <NA> <NA> NA
## 341 3 1 12822 38 2 3 3 2 1994 2 77 180 <NA> 1996 NA
## 342 3 1 12822 43 3 3 3 2 1994 2 77 180 <NA> <NA> NA
## 343 5 1 12822 49 4 4 4 1 1994 4 77 180 <NA> 1996 NA
## 502 1 1 13207 34 2 2 2 1 1995 8 68 180 <NA> 1997 NA
## 503 4 1 13207 54 4 4 3 1 1995 2 68 180 <NA> 1997 NA
## 1153 2 1 14312 43 2 2 2 1 1998 2 72 180 14 <NA> 24
## 1154 4 1 14312 64 3 4 3 1 1998 7 72 180 14 <NA> 37
## 1171 2 1 14312 46 3 4 3 1 1998 7 72 180 <NA> 2001 NA
## IPP p180 IDMO GC1 GC2 NGC GC4 PEIDS FCIDS
## 2772 <NA> <NA> <NA> 200612 200612 84 84 33.81312 1.002764
## 2829 <NA> <NA> <NA> 200711 200711 50 50 33.81312 1.002764
## 2830 <NA> <NA> <NA> 200711 200711 50 50 33.81312 1.002764
## 287 <NA> 40 <NA> 199411 199411 65 65 33.90656 1.000001
## 288 <NA> 54 <NA> 199411 199411 65 65 33.90656 1.000001
## 340 <NA> <NA> <NA> 199412 199412 56 56 33.90656 1.000001
## 341 23 <NA> 4 199412 199412 56 56 33.90656 1.000001
## 342 <NA> <NA> <NA> 199412 199412 56 56 33.90656 1.000001
## 343 25 <NA> 6 199412 199412 56 56 33.90656 1.000001
## 502 23 <NA> 10 199512 199512 43 43 33.90656 1.000001
## 503 23 <NA> 4 199512 199512 43 43 33.90656 1.000001
## 1153 <NA> 43 <NA> 199821 199821 27 27 33.90656 1.000001
## 1154 <NA> 64 <NA> 199821 199821 27 27 33.90656 1.000001
## 1171 36 <NA> 10 199822 199822 25 25 33.90656 1.000001
DATA O; SET N;
PEFCIDS = PE*FCIDS; /*PERÍMETRO CORRIGIDO*/
PROC SORT; BY IDS;
Proc print; RUN;
dataO <- dataN
dataO$PEFCIDS <- dataO$PE*dataO$FCIDS
dataO <- dataO[order(dataO$ids),]
dataO[19:24,]## GC3 Animal Pai Mae NascMae Sx DN PN GMJD DD PD CND
## 2933 111 99D2007 536S2006 29D2004 2004 2 17399 5.5 1 17477 30 3
## 226 11 590S1993 0555 92-0511992 1992 1 12331 7.5 2 12408 24 2
## 2931 111 95D2007 555S2006 60D2001 2001 2 17398 6.5 1 17477 20 1
## 2721 106 626D2006 538D2004 77D2004 2004 1 17055 5.0 1 17127 23 3
## 2722 106 628S2006 555D2005 64D2003 2003 1 17055 6.0 1 17127 29 2
## 2723 106 629D2006 507S2003 82D2001 2001 1 17055 5.0 1 17127 18 1
## PRD MCD GMJS DS PS CNS PRS MCS TParto anonasc idmae idd ids GC Ano
## 2933 2 2 1 17539 41 4 4 4 2 2007 3 78 140 <NA> <NA>
## 226 2 3 1 12472 27 1 1 2 1 1993 1 77 141 2 <NA>
## 2931 1 1 1 17539 35 2 2 2 2 2007 6 79 141 <NA> <NA>
## 2721 3 2 1 17197 47 2 2 2 2 2006 2 72 142 34 <NA>
## 2722 2 3 1 17197 40 3 3 1 1 2006 3 72 142 34 <NA>
## 2723 1 1 1 17197 27 1 1 1 2 2006 5 72 142 <NA> <NA>
## PE IPP p180 IDMO GC1 GC2 NGC GC4 PEIDS FCIDS PEFCIDS
## 2933 NA <NA> <NA> <NA> 200712 200712 63 63 30.16905 1.123887 NA
## 226 22 <NA> 27 <NA> 199321 199321 22 22 30.26249 1.120417 24.64917
## 2931 NA <NA> <NA> <NA> 200712 200712 63 63 30.26249 1.120417 NA
## 2721 26 <NA> 47 <NA> 200611 200611 95 95 30.35592 1.116968 29.04117
## 2722 23 <NA> 40 <NA> 200611 200611 95 95 30.35592 1.116968 25.69027
## 2723 NA <NA> <NA> <NA> 200611 200611 95 95 30.35592 1.116968 NA
Data P; Set O;
File "C:\Users\bah_m\OneDrive\Documentos2\Doutorado\Disciplinas\Grupo de estudos_R\Ovinos2.dat";
Put Animal$ 1-12 Pai$ 14-25 Mae$ 27-38 NascMae 40-43 Sx$ 45 DN 47-53 PN 56-58 .1 GMJD$ 60 DD 62-69 PD 71-72 CND 74 PRD 76
MCD 78 GMJS$ 80 DS 82-89 PS 91-93 CNS 95 PRS 97 MCS 99 TParto 101 anonasc 103-106 idmae 108-109 idd 111-113 ids 115-117
Ano 119-121 PE 123-124 IPP 126-127 p180 129-130 IDMO 132-133 GC 135-136 PEIDS 138-142 .2 FCIDS 144-150 .5 PEFCIDS 152-156 .2 ;
run;
# Para salvar arquivos com largura fixa precisaremos de um pacote
# Instalar o pacote:
#install.packages("gdata")
# "Chamar" o pacote
require(gdata)## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
write.fwf(dataO[, c("Animal", "Pai", "Mae", "NascMae", "Sx", "DN", "PN", "GMJD", "DD", "PD", "CND", "PRD", "MCD", "GMJS", "DS", "PS", "CNS", "PRS", "MCS", "TParto", "anonasc", "idmae", "idd", "ids", "Ano", "PE", "IPP", "p180", "IDMO", "GC", "PEIDS", "FCIDS", "PEFCIDS")], file = "Ovinos2.dat")
# Pode especificar a largura de cada variável ou se aproveitar do princípio da reciclagem do R, indicando somente 1 largura
write.fwf(dataO[, c("Animal", "Pai", "Mae", "NascMae", "Sx", "DN", "PN", "GMJD", "DD", "PD", "CND", "PRD", "MCD", "GMJS", "DS", "PS", "CNS", "PRS", "MCS", "TParto", "anonasc", "idmae", "idd", "ids", "Ano", "PE", "IPP", "p180", "IDMO", "GC", "PEIDS", "FCIDS", "PEFCIDS")], file = "Ovinos2.dat", width = 13)
PROC IMPORT OUT= WORK.A
DATAFILE= "C:\Users\bah_m\Desktop\Cláudia Zago\SAS_R_Andre\data_inicial.xlsx"
DBMS=EXCELCS REPLACE;
RANGE="data_inicial";
SCANTEXT=YES;
USEDATE=YES;
SCANTIME=YES;
RUN; /* 2997 observations and 187 variables */
# Usando pacote readxl. Para instalar, utilize install.packages("readxl")
dataA <- as.data.frame(readxl::read_excel("data_inicial.xlsx", sheet = "data_inicial")) # além de ler caracteres como caracteres mesmo, já entende as datas corretamente.## Warning in read_fun(path = path, sheet = sheet, limits = limits, shim = shim, :
## Expecting logical in FR1447 / R1447C174: got a date
## Warning in read_fun(path = path, sheet = sheet, limits = limits, shim = shim, :
## Expecting logical in FR1448 / R1448C174: got a date
## Warning in read_fun(path = path, sheet = sheet, limits = limits, shim = shim, :
## Expecting logical in FS1448 / R1448C175: got a date
## Warning in read_fun(path = path, sheet = sheet, limits = limits, shim = shim, :
## Expecting logical in FR1877 / R1877C174: got a date
dataA[1:10, 1:25]## regiao rebanho registro dparto diaslac l305 l365 lreal
## 1 NORTE PIONEIRO 46830 105498D 2015-04-10 302 3818.80 0.00 3818.80
## 2 NORTE PIONEIRO 46830 106522D 2014-08-12 304 3498.20 0.00 3498.20
## 3 NORTE PIONEIRO 46830 135251-C 2015-03-10 314 6122.89 6247.09 6247.09
## 4 NORTE PIONEIRO 46830 135254P 2014-09-03 379 3338.05 3574.60 3633.40
## 5 NORTE PIONEIRO 46830 137589-C 2014-07-19 354 5820.00 6517.45 6517.45
## 6 NORTE PIONEIRO 49101 147791C 2014-04-17 250 6388.56 0.00 6388.56
## 7 NORTE PIONEIRO 49101 147791C 2015-04-04 146 1542.18 0.00 1542.18
## 8 NORTE PIONEIRO 49101 147791C 2016-02-24 107 2103.90 0.00 2103.90
## 9 NORTE PIONEIRO 49101 147793C 2013-08-14 496 2578.80 2986.80 5895.30
## 10 NORTE PIONEIRO 49101 147793C 2014-08-17 128 2121.04 0.00 2121.04
## g305 g365 greal p305 p365 preal dclass pfinal ecc estat lpeito
## 1 161.87 0.00 161.87 135.44 0.00 135.44 <NA> NA NA NA NA
## 2 167.75 0.00 167.75 147.01 0.00 147.01 <NA> NA NA NA NA
## 3 201.34 205.62 205.62 200.07 204.13 204.13 <NA> NA NA NA NA
## 4 145.84 156.95 159.47 132.09 141.25 143.53 <NA> NA NA NA NA
## 5 234.86 264.49 264.49 217.02 244.00 244.00 <NA> NA NA NA NA
## 6 263.46 0.00 263.46 223.27 0.00 223.27 <NA> NA NA NA NA
## 7 61.06 0.00 61.06 57.46 0.00 57.46 <NA> NA NA NA NA
## 8 87.48 0.00 87.48 68.44 0.00 68.44 <NA> NA NA NA NA
## 9 84.75 98.43 194.87 86.56 100.54 193.27 <NA> NA NA NA NA
## 10 78.77 0.00 78.77 71.87 0.00 71.87 <NA> NA NA NA NA
## pcorpo ang flombar qossea fleiteira controle1
## 1 NA NA NA NA NA 2016-02-02
## 2 NA NA NA NA NA 2015-06-08
## 3 NA NA NA NA NA 2016-01-04
## 4 NA NA NA NA NA 2015-08-08
## 5 NA NA NA NA NA 2015-07-06
## 6 NA NA NA NA NA 2014-12-11
## 7 NA NA NA NA NA 2015-08-14
## 8 NA NA NA NA NA 2016-05-18
## 9 NA NA NA NA NA 2014-02-03
## 10 NA NA NA NA NA 2014-12-11
/* APAGAR INFORMAÇÕES QUE NÃO SERÃO UTILIZADAS */
DATA B; SET A;
DROP REGIAO L365 LREAL G365 GREAL P365 PREAL DCLASS PFINAL ECC ESTAT LPEITO PCORPO ANG FLOMBAR QOSSEA FLEITEIRA DENCERRAMENTO DIASLAC F23
CONTROLE5 CONTROLE6 CONTROLE7 CONTROLE8 CONTROLE9 CONTROLE10 CONTROLE11 CONTROLE12 CONTROLE13 CONTROLE14 CONTROLE15
CONTROLE16 CONTROLE17 CONTROLE18 CONTROLE19 CONTROLE20
LCONTROLE1 LCONTROLE2 LCONTROLE3 LCONTROLE4 LCONTROLE5 LCONTROLE6 LCONTROLE7 LCONTROLE8 LCONTROLE9 LCONTROLE10 LCONTROLE11 LCONTROLE12 LCONTROLE13 LCONTROLE14 LCONTROLE15
LCONTROLE16 LCONTROLE17 LCONTROLE18 LCONTROLE19 LCONTROLE20
GCONTROLE1 GCONTROLE2 GCONTROLE3 GCONTROLE4 GCONTROLE5 GCONTROLE6 GCONTROLE7 GCONTROLE8 GCONTROLE9 GCONTROLE10 GCONTROLE11 GCONTROLE12 GCONTROLE13 GCONTROLE14 GCONTROLE15
GCONTROLE16 GCONTROLE17 GCONTROLE18 GCONTROLE19 GCONTROLE20
PCONTROLE1 PCONTROLE2 PCONTROLE3 PCONTROLE4 PCONTROLE5 PCONTROLE6 PCONTROLE7 PCONTROLE8 PCONTROLE9 PCONTROLE10 PCONTROLE11 PCONTROLE12 PCONTROLE13 PCONTROLE14 PCONTROLE15
PCONTROLE16 PCONTROLE17 PCONTROLE18 PCONTROLE19 PCONTROLE20
LACCONTROLE1 LACCONTROLE2 LACCONTROLE3 LACCONTROLE4 LACCONTROLE5 LACCONTROLE6 LACCONTROLE7 LACCONTROLE8 LACCONTROLE9 LACCONTROLE10 LACCONTROLE11 LACCONTROLE12 LACCONTROLE13
LACCONTROLE14 LACCONTROLE15 LACCONTROLE16 LACCONTROLE17 LACCONTROLE18 LACCONTROLE19 LACCONTROLE20
SOLCONTROLE1 SOLCONTROLE2 SOLCONTROLE3 SOLCONTROLE4 SOLCONTROLE5 SOLCONTROLE6 SOLCONTROLE7 SOLCONTROLE8 SOLCONTROLE9 SOLCONTROLE10 SOLCONTROLE11 SOLCONTROLE12 SOLCONTROLE13
SOLCONTROLE14 SOLCONTROLE15 SOLCONTROLE16 SOLCONTROLE17 SOLCONTROLE18 SOLCONTROLE19 SOLCONTROLE20
CCSCONTROLE1 CCSCONTROLE2 CCSCONTROLE3 CCSCONTROLE4 CCSCONTROLE5 CCSCONTROLE6 CCSCONTROLE7 CCSCONTROLE8 CCSCONTROLE9 CCSCONTROLE10 CCSCONTROLE11 CCSCONTROLE12 CCSCONTROLE13
CCSCONTROLE14 CCSCONTROLE15 CCSCONTROLE16 CCSCONTROLE17 CCSCONTROLE18 CCSCONTROLE19 CCSCONTROLE20
COBERTURA1 COBERTURA2 COBERTURA3 COBERTURA4 COBERTURA5 COBERTURA6 COBERTURA7 COBERTURA8 COBERTURA9 COBERTURA10 COBERTURA11 COBERTURA12 COBERTURA13 COBERTURA14 COBERTURA15;
RUN; /* 2997 observations and 16 variables */
names(dataA)## [1] "regiao" "rebanho" "registro" "dparto"
## [5] "diaslac" "l305" "l365" "lreal"
## [9] "g305" "g365" "greal" "p305"
## [13] "p365" "preal" "dclass" "pfinal"
## [17] "ecc" "estat" "lpeito" "pcorpo"
## [21] "ang" "flombar" "qossea" "fleiteira"
## [25] "controle1" "lcontrole1" "gcontrole1" "pcontrole1"
## [29] "laccontrole1" "solcontrole1" "ccscontrole1" "controle2"
## [33] "lcontrole2" "gcontrole2" "pcontrole2" "laccontrole2"
## [37] "solcontrole2" "ccscontrole2" "controle3" "lcontrole3"
## [41] "gcontrole3" "pcontrole3" "laccontrole3" "solcontrole3"
## [45] "ccscontrole3" "controle4" "lcontrole4" "gcontrole4"
## [49] "pcontrole4" "laccontrole4" "solcontrole4" "ccscontrole4"
## [53] "controle5" "lcontrole5" "gcontrole5" "pcontrole5"
## [57] "laccontrole5" "solcontrole5" "ccscontrole5" "controle6"
## [61] "lcontrole6" "gcontrole6" "pcontrole6" "laccontrole6"
## [65] "solcontrole6" "ccscontrole6" "controle7" "lcontrole7"
## [69] "gcontrole7" "pcontrole7" "laccontrole7" "solcontrole7"
## [73] "ccscontrole7" "controle8" "lcontrole8" "gcontrole8"
## [77] "pcontrole8" "laccontrole8" "solcontrole8" "ccscontrole8"
## [81] "controle9" "lcontrole9" "gcontrole9" "pcontrole9"
## [85] "laccontrole9" "solcontrole9" "ccscontrole9" "controle10"
## [89] "lcontrole10" "gcontrole10" "pcontrole10" "laccontrole10"
## [93] "solcontrole10" "ccscontrole10" "controle11" "lcontrole11"
## [97] "gcontrole11" "pcontrole11" "laccontrole11" "solcontrole11"
## [101] "ccscontrole11" "controle12" "lcontrole12" "gcontrole12"
## [105] "pcontrole12" "laccontrole12" "solcontrole12" "ccscontrole12"
## [109] "controle13" "lcontrole13" "gcontrole13" "pcontrole13"
## [113] "laccontrole13" "solcontrole13" "ccscontrole13" "controle14"
## [117] "lcontrole14" "gcontrole14" "pcontrole14" "laccontrole14"
## [121] "solcontrole14" "ccscontrole14" "controle15" "lcontrole15"
## [125] "gcontrole15" "pcontrole15" "laccontrole15" "solcontrole15"
## [129] "ccscontrole15" "controle16" "lcontrole16" "gcontrole16"
## [133] "pcontrole16" "laccontrole16" "solcontrole16" "ccscontrole16"
## [137] "controle17" "lcontrole17" "gcontrole17" "pcontrole17"
## [141] "laccontrole17" "solcontrole17" "ccscontrole17" "controle18"
## [145] "lcontrole18" "gcontrole18" "pcontrole18" "laccontrole18"
## [149] "solcontrole18" "ccscontrole18" "controle19" "lcontrole19"
## [153] "gcontrole19" "pcontrole19" "laccontrole19" "solcontrole19"
## [157] "ccscontrole19" "controle20" "lcontrole20" "gcontrole20"
## [161] "pcontrole20" "laccontrole20" "solcontrole20" "ccscontrole20"
## [165] "ordemparto" "dencerramento" "diaslact" "cobertura1"
## [169] "cobertura2" "cobertura3" "cobertura4" "cobertura5"
## [173] "cobertura6" "cobertura7" "cobertura8" "cobertura9"
## [177] "cobertura10" "cobertura11" "cobertura12" "cobertura13"
## [181] "cobertura14" "cobertura15" "F23" "dnasc"
## [185] "gs" "rgpai" "rgmae"
dataB <- dataA[, !(names(dataA) %in% c("regiao", "l365", "lreal", "g365", "greal", "p365", "preal", "dclass", "pfinal", "ecc", "estat", "lpeito", "pcorpo", "ang", "flombar", "qossea", "fleiteira", "dencerramento", "diaslac", "F23",
"lcontrole1", "gcontrole1", "pcontrole1", "laccontrole1", "solcontrole1", "ccscontrole1", "lcontrole2", "gcontrole2", "pcontrole2", "laccontrole2", "solcontrole2", "ccscontrole2", "lcontrole3", "gcontrole3", "pcontrole3", "laccontrole3", "solcontrole3", "ccscontrole3", "lcontrole4", "gcontrole4", "pcontrole4", "laccontrole4", "solcontrole4", "ccscontrole4", "controle5", "lcontrole5", "gcontrole5", "pcontrole5", "laccontrole5", "solcontrole5", "ccscontrole5", "controle6", "lcontrole6", "gcontrole6", "pcontrole6", "laccontrole6", "solcontrole6", "ccscontrole6", "controle7", "lcontrole7", "gcontrole7", "pcontrole7", "laccontrole7", "solcontrole7", "ccscontrole7", "controle8", "lcontrole8", "gcontrole8", "pcontrole8", "laccontrole8", "solcontrole8", "ccscontrole8", "controle9", "lcontrole9", "gcontrole9", "pcontrole9", "laccontrole9", "solcontrole9", "ccscontrole9", "controle10", "lcontrole10", "gcontrole10", "pcontrole10", "laccontrole10", "solcontrole10", "ccscontrole10", "controle11", "lcontrole11", "gcontrole11", "pcontrole11", "laccontrole11", "solcontrole11", "ccscontrole11", "controle12", "lcontrole12", "gcontrole12", "pcontrole12", "laccontrole12", "solcontrole12", "ccscontrole12", "controle13", "lcontrole13", "gcontrole13", "pcontrole13", "laccontrole13", "solcontrole13", "ccscontrole13", "controle14", "lcontrole14", "gcontrole14", "pcontrole14", "laccontrole14", "solcontrole14", "ccscontrole14", "controle15", "lcontrole15", "gcontrole15", "pcontrole15", "laccontrole15", "solcontrole15", "ccscontrole15", "controle16", "lcontrole16", "gcontrole16", "pcontrole16", "laccontrole16", "solcontrole16", "ccscontrole16", "controle17", "lcontrole17", "gcontrole17", "pcontrole17", "laccontrole17", "solcontrole17", "ccscontrole17", "controle18", "lcontrole18", "gcontrole18", "pcontrole18", "laccontrole18", "solcontrole18", "ccscontrole18", "controle19", "lcontrole19", "gcontrole19", "pcontrole19", "laccontrole19", "solcontrole19", "ccscontrole19", "controle20", "lcontrole20", "gcontrole20", "pcontrole20", "laccontrole20", "solcontrole20", "ccscontrole20", "cobertura1", "cobertura2", "cobertura3", "cobertura4", "cobertura5", "cobertura6", "cobertura7", "cobertura8", "cobertura9", "cobertura10", "cobertura11", "cobertura12", "cobertura13", "cobertura14", "cobertura15"))]
dim(dataB)## [1] 2997 16
/* SORTEAR POR REGISTRO E DATA DE PARTO */
/* COMANDO ARRAY PARA ORGANIZAR INFORMAÇÕES EM LINHAS */
PROC SORT DATA=B; BY REGISTRO DPARTO; RUN;
DATA C;
ARRAY CONTROLE{4} CONTROLE1-CONTROLE4;
ARRAY LCONTROLE{4} LCONTROLE1-LCONTROLE4;
ARRAY GCONTROLE{4} GCONTROLE1-GCONTROLE4;
ARRAY PCONTROLE{4} PCONTROLE1-PCONTROLE4;
ARRAY LACCONTROLE{4} LACCONTROLE1-LACCONTROLE4;
ARRAY SOLCONTROLE{4} SOLCONTROLE1-SOLCONTROLE4;
ARRAY CCSCONTROLE{4} CCSCONTROLE1-CCSCONTROLE4;
SET B;
DO I=1 TO 4;
DCONT=CONTROLE(I);
LCONT=LCONTROLE(I);
GCONT=GCONTROLE(I);
PCONT=PCONTROLE(I);
LACCONT=LACCONTROLE(I);
SOLCONT=SOLCONTROLE(I);
CCSCONT=CCSCONTROLE(I);
DROP CONTROLE1-CONTROLE5 I;
DROP LCONTROLE1-LCONTROLE4;
DROP GCONTROLE1-GCONTROLE4;
DROP PCONTROLE1-PCONTROLE4;
DROP LACCONTROLE1-LACCONTROLE4;
DROP SOLCONTROLE1-SOLCONTROLE4;
DROP CCSCONTROLE1-CCSCONTROLE4;
OUTPUT; END; RUN;
PROC SORT; BY DPARTO REGISTRO DCONT; RUN;
/* 11.988 observations and 19 variables */
/* WARNING: Not all variables in the list CONTROLE1-CONTROLE5 were found. */
dataB <- dataB[order(dataB$registro, dataB$dparto), ]
dataC <- reshape(dataB, varying = list(c("controle1", "controle2", "controle3", "controle4")), direction = "long", v.names = "DCONT")
dataC <- dataC[order(dataC$dparto, dataC$registro, dataC$DCONT, na.last = FALSE), ]
head(dataC)## rebanho registro dparto l305 g305 p305 ordemparto diaslact
## 770.4 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.3 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.2 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.1 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 323.4 35264 BR1508184 2010-01-06 4779.67 107.72 160.09 2 178
## 323.3 35264 BR1508184 2010-01-06 4779.67 107.72 160.09 2 178
## dnasc gs rgpai rgmae time DCONT id
## 770.4 2007-05-06 15/16 <NA> <NA> 4 2010-09-28 770
## 770.3 2007-05-06 15/16 <NA> <NA> 3 2010-10-25 770
## 770.2 2007-05-06 15/16 <NA> <NA> 2 2010-11-24 770
## 770.1 2007-05-06 15/16 <NA> <NA> 1 2010-12-22 770
## 323.4 2006-09-08 07-ago <NA> <NA> 4 <NA> 323
## 323.3 2006-09-08 07-ago <NA> <NA> 3 2010-03-05 323
/* O SAS RECONHECEU QUE EXISTEM INFORMAÇÕES FALTANTES - WARNING */
/* APAGAR INFORMAÇÕES FALTANTES */
DATA C1; SET C;
IF DCONT = "." THEN DELETE;
RUN; /* 10.894 */
dataC1 <- dataC[!is.na(dataC$DCONT), ]
nrow(dataC1)## [1] 10894
head(dataC1)## rebanho registro dparto l305 g305 p305 ordemparto diaslact
## 770.4 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.3 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.2 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.1 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 323.3 35264 BR1508184 2010-01-06 4779.67 107.72 160.09 2 178
## 323.2 35264 BR1508184 2010-01-06 4779.67 107.72 160.09 2 178
## dnasc gs rgpai rgmae time DCONT id
## 770.4 2007-05-06 15/16 <NA> <NA> 4 2010-09-28 770
## 770.3 2007-05-06 15/16 <NA> <NA> 3 2010-10-25 770
## 770.2 2007-05-06 15/16 <NA> <NA> 2 2010-11-24 770
## 770.1 2007-05-06 15/16 <NA> <NA> 1 2010-12-22 770
## 323.3 2006-09-08 07-ago <NA> <NA> 3 2010-03-05 323
## 323.2 2006-09-08 07-ago <NA> <NA> 2 2010-04-09 323
/* MANTER APENAS AS COLUNAS COM INFORMAÇÕES QUE ME INTERESSAM */
DATA C2; SET C1;
KEEP REBANHO REGISTRO DPARTO L305 G305 P305 ORDEMPARTO DIASLACT DNASC GS RGPAI RGMAE DCONT;
RUN; /* 10.894 observations and 13 variables */
dataC2 <- dataC1[, c("rebanho", "registro", "dparto", "l305", "g305", "p305", "ordemparto", "diaslact", "dnasc", "gs", "rgpai", "rgmae", "DCONT")]
dim(dataC2)## [1] 10894 13
head(dataC2)## rebanho registro dparto l305 g305 p305 ordemparto diaslact
## 770.4 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.3 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.2 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 770.1 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 323.3 35264 BR1508184 2010-01-06 4779.67 107.72 160.09 2 178
## 323.2 35264 BR1508184 2010-01-06 4779.67 107.72 160.09 2 178
## dnasc gs rgpai rgmae DCONT
## 770.4 2007-05-06 15/16 <NA> <NA> 2010-09-28
## 770.3 2007-05-06 15/16 <NA> <NA> 2010-10-25
## 770.2 2007-05-06 15/16 <NA> <NA> 2010-11-24
## 770.1 2007-05-06 15/16 <NA> <NA> 2010-12-22
## 323.3 2006-09-08 07-ago <NA> <NA> 2010-03-05
## 323.2 2006-09-08 07-ago <NA> <NA> 2010-04-09
/* MANTER APENAS AS INFORMÇÕES DO PRIMEIRO CONTROLE */
/* aqui eu não consigo ter certeza que o primeiro controle o é de fato, sei que é o controle com a data mais antiga */
DATA C3; SET C2;
BY DPARTO REGISTRO ;
IF FIRST.REGISTRO THEN OUTPUT;
RUN; /* 2.960 observations and 13 variables */
dataC3 <- dataC2[order(dataC2$dparto, dataC2$registro), ]
dataC3 <- dataC3[!duplicated(paste0(dataC3$registro, dataC3$dparto)), ]
dim(dataC3)## [1] 2960 13
head(dataC3)## rebanho registro dparto l305 g305 p305 ordemparto diaslact
## 770.4 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 323.3 35264 BR1508184 2010-01-06 4779.67 107.72 160.09 2 178
## 1840.4 34112 SR786661 2010-01-06 7925.10 284.93 262.48 1 438
## 238.4 34112 BR1415717 2010-01-07 5418.35 172.94 147.45 4 353
## 279.4 37542 BR1471811 2010-01-08 6168.26 198.30 227.33 2 348
## 385.4 37542 BR1517251 2010-01-08 9630.03 412.19 361.06 3 395
## dnasc gs rgpai rgmae DCONT
## 770.4 2007-05-06 15/16 <NA> <NA> 2010-09-28
## 323.3 2006-09-08 07-ago <NA> <NA> 2010-03-05
## 1840.4 2006-06-26 01-fev <NA> <NA> 2010-11-28
## 238.4 2003-06-18 PCOD <NA> <NA> 2010-08-27
## 279.4 2006-08-07 GC-10 AX116954 BR1363254 2010-08-24
## 385.4 2004-07-10 PCOD <NA> <NA> 2010-10-22
/* COMANDO PARA CONTAR NÚMERO DE PAIS E MÃES - DENTRO OS ANIMAIS LISTADOS NAS COLUNAS REGISTRO E PAIS/MÃES */
/* Pais */
DATA AN; SET C3;
REG=REGISTRO;
KEEP REG; PROC SORT; BY REG;
DATA PAI; SET C3;
REG=RGPAI;
KEEP REG; PROC SORT NODUPKEY; BY REG;
DATA AN_PAI; MERGE AN (in=x) PAI (in=y);
BY REG; IF X AND Y;
RUN; /* 0 PAIS */
/* Mães */
DATA AN; SET C3;
REG=REGISTRO;
KEEP REG; PROC SORT; BY REG;
DATA MAE; SET C3;
REG=RGMAE;
KEEP REG; PROC SORT NODUPKEY; BY REG;
DATA AN_MAE; MERGE AN (in=x) MAE (in=y);
BY REG; IF X AND Y;
RUN; /* 471 MAES */
# Pais
AN <- data.frame(reg = dataC3$registro)
PAI <- data.frame(reg = unique(dataC3[!is.na(dataC3$rgpai), "rgpai"]))
AN_PAI <- merge(AN, PAI, by = "reg")
nrow(AN_PAI)## [1] 0
# Maes
AN <- data.frame(reg = dataC3$registro)
MAE <- data.frame(reg = unique(dataC3[!is.na(dataC3$rgmae), "rgmae"]))
AN_MAE <- merge(AN, MAE, by = "reg")
nrow(AN_MAE)## [1] 471
# Se for so pra saber o numero (nao precisar dos objetos criados), podemos fazer em uma linha de codigo:
sum(dataC3$registro %in% unique(dataC3$rgmae))## [1] 471
DATA C4; SET C3;
PROC FREQ; TABLE ORDEMPARTO;
RUN;
/* 1 1221 41.25 1221 41.25
2 843 28.48 2064 69.73
3 503 16.99 2567 86.72
4 242 8.18 2809 94.90
5 107 3.61 2916 98.51
6 28 0.95 2944 99.46
7 12 0.41 2956 99.86
8 3 0.10 2959 99.97
10 1 0.03 2960 100.00
*/
table(dataC3$ordemparto)##
## 1 2 3 4 5 6 7 8 10
## 1221 843 503 242 107 28 12 3 1
prop.table(table(dataC3$ordemparto))##
## 1 2 3 4 5 6
## 0.4125000000 0.2847972973 0.1699324324 0.0817567568 0.0361486486 0.0094594595
## 7 8 10
## 0.0040540541 0.0010135135 0.0003378378
cumsum(table(dataC3$ordemparto))## 1 2 3 4 5 6 7 8 10
## 1221 2064 2567 2809 2916 2944 2956 2959 2960
cumsum(prop.table(table(dataC3$ordemparto)))## 1 2 3 4 5 6 7 8
## 0.4125000 0.6972973 0.8672297 0.9489865 0.9851351 0.9945946 0.9986486 0.9996622
## 10
## 1.0000000
DATA C5; SET C3;
IF ORDEMPARTO NE 1 THEN DELETE;
RUN; /* 1.221 observations and 13 variables */
dataC5 <- dataC3[dataC3$ordemparto == 1, ]
dim(dataC5)## [1] 1221 13
head(dataC5)## rebanho registro dparto l305 g305 p305 ordemparto diaslact
## 770.4 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 1840.4 34112 SR786661 2010-01-06 7925.10 284.93 262.48 1 438
## 1035.4 37542 BR1588956 2010-01-13 3667.89 143.38 117.84 1 453
## 1819.4 48904 SR785532 2010-01-15 6412.96 302.74 219.18 1 319
## 1895.4 37542 SR793212 2010-01-17 2379.55 112.37 84.18 1 292
## 1942.4 37542 SR793270 2010-01-20 3964.09 181.09 137.45 1 336
## dnasc gs rgpai rgmae DCONT
## 770.4 2007-05-06 15/16 <NA> <NA> 2010-09-28
## 1840.4 2006-06-26 01-fev <NA> <NA> 2010-11-28
## 1035.4 2007-12-12 PCOD <NA> <NA> 2010-10-22
## 1819.4 2005-05-03 01-fev <NA> <NA> 2010-08-24
## 1895.4 2007-06-06 01-fev <NA> <NA> 2010-07-15
## 1942.4 2007-07-19 01-fev <NA> <NA> 2010-08-24
DATA C6; SET C5;
IDADE=ROUND((DPARTO-DNASC)/30.5); *idade em meses;
IF IDADE <=20 THEN DELETE;
IF IDADE >= 40 THEN DELETE;
RUN;
/* 842 observations and 14 variables */
dataC5$idade <- as.numeric(round((as.Date(dataC5$dparto) - as.Date(dataC5$dnasc))/30.5))
dataC6 <- dataC5[dataC5$idade > 20 & dataC5$idade < 40, ]
dim(dataC6)## [1] 842 14
DATA C8; SET C6;
IF L305 > (6312.26 + (3*2436.21)) THEN DELETE;
IF L305 < (6312.26 - (3*2436.21)) THEN DELETE;
IF G305 > (243.16 + (3*91.09)) THEN DELETE;
IF G305 < (243.16 - (3*91.09)) THEN DELETE;
IF P305 > (205.57 + (3*77.68)) THEN DELETE;
IF P305 < (205.57 - (3*77.68)) THEN DELETE;
RUN; /* 842 observations and 14 variables */
dataC8 <- dataC6[(dataC6$l305 <= mean(dataC6$l305) + 3*sd(dataC6$l305) & dataC6$l305 >= mean(dataC6$l305) - 3*sd(dataC6$l305)) | dataC6$g305 <= mean(dataC6$g305) + 3*sd(dataC6$g305) & dataC6$g305 >= mean(dataC6$g305) - 3*sd(dataC6$g305) | dataC6$p305 <= mean(dataC6$p305) + 3*sd(dataC6$p305) & dataC6$p305 >= mean(dataC6$p305) - 3*sd(dataC6$p305), ]
dim(dataC8)## [1] 842 14
head(dataC8)## rebanho registro dparto l305 g305 p305 ordemparto diaslact
## 770.4 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 1035.4 37542 BR1588956 2010-01-13 3667.89 143.38 117.84 1 453
## 1895.4 37542 SR793212 2010-01-17 2379.55 112.37 84.18 1 292
## 1942.4 37542 SR793270 2010-01-20 3964.09 181.09 137.45 1 336
## 1590.4 37542 BX370145 2010-02-03 10659.26 357.30 343.17 1 345
## 1897.4 37542 SR793217 2010-02-07 3113.65 160.92 109.44 1 271
## dnasc gs rgpai rgmae DCONT idade
## 770.4 2007-05-06 15/16 <NA> <NA> 2010-09-28 32
## 1035.4 2007-12-12 PCOD <NA> <NA> 2010-10-22 25
## 1895.4 2007-06-06 01-fev <NA> <NA> 2010-07-15 31
## 1942.4 2007-07-19 01-fev <NA> <NA> 2010-08-24 30
## 1590.4 2007-04-27 PO AX116371 BX307244 2010-09-28 33
## 1897.4 2007-06-06 01-fev <NA> <NA> 2010-07-15 32
DATA C9; SET C8;
DIAP=DAY(DPARTO);
MESP=MONTH(DPARTO);
ANOP=YEAR(DPARTO);
RUN; /* 842 observations and 17 variables */
dataC8$diap <- format(as.Date(dataC8$dparto), "%d")
dataC8$mesp <- format(as.Date(dataC8$dparto), "%m")
dataC8$anop <- format(as.Date(dataC8$dparto), "%Y")
dataC9 <- dataC8
head(dataC9)## rebanho registro dparto l305 g305 p305 ordemparto diaslact
## 770.4 48904 BR1559434 2010-01-03 6012.85 269.13 186.77 1 368
## 1035.4 37542 BR1588956 2010-01-13 3667.89 143.38 117.84 1 453
## 1895.4 37542 SR793212 2010-01-17 2379.55 112.37 84.18 1 292
## 1942.4 37542 SR793270 2010-01-20 3964.09 181.09 137.45 1 336
## 1590.4 37542 BX370145 2010-02-03 10659.26 357.30 343.17 1 345
## 1897.4 37542 SR793217 2010-02-07 3113.65 160.92 109.44 1 271
## dnasc gs rgpai rgmae DCONT idade diap mesp anop
## 770.4 2007-05-06 15/16 <NA> <NA> 2010-09-28 32 03 01 2010
## 1035.4 2007-12-12 PCOD <NA> <NA> 2010-10-22 25 13 01 2010
## 1895.4 2007-06-06 01-fev <NA> <NA> 2010-07-15 31 17 01 2010
## 1942.4 2007-07-19 01-fev <NA> <NA> 2010-08-24 30 20 01 2010
## 1590.4 2007-04-27 PO AX116371 BX307244 2010-09-28 33 03 02 2010
## 1897.4 2007-06-06 01-fev <NA> <NA> 2010-07-15 32 07 02 2010
DATA D; SET C8;
PROC PLOT; PLOT IDADE*ANOP;
RUN;
plot(idade ~ anop, data = dataC8, pch = 16, col = rgb(0, 0, 0, alpha = 0.1))# pra quem quer mesmo letras..
procPlot <- function(x, y, ...){
paraPlot <- as.data.frame(table(x, y))
paraPlot <- paraPlot[paraPlot$Freq > 0, ]
plot(as.numeric(as.character(paraPlot$x)), as.numeric(as.character(paraPlot$y)), type = "n", xlab = "", ylab = "", ...)
text(as.numeric(as.character(paraPlot$x)), as.numeric(as.character(paraPlot$y)), labels = LETTERS[paraPlot$Freq])
}
procPlot(dataC8$anop, dataC8$idade)
DATA D1; SET C8;
PROC GPLOT; PLOT IDADE*ANOP;
RUN;
plot(dataC8$idade ~ dataC8$anop)
DATA D2; SET C8;
PROC CORR; VAR L305 IDADE;
TITLE 'CORRELAÇAO ENTRE L305 E IDADE DE VACAS HOLANDESAS';
RUN;
cor.test(dataC8$l305, dataC8$idade)##
## Pearson's product-moment correlation
##
## data: dataC8$l305 and dataC8$idade
## t = 1.9596, df = 840, p-value = 0.05037
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0001035736 0.1344090780
## sample estimates:
## cor
## 0.06745929
DATA D3; SET C8;
PROC CORR SPERMAN; VAR L305 IDADE;
TITLE 'CORRELAÇAO ENTRE L305 E IDADE DE VACAS HOLANDESAS';
RUN;
cor.test(dataC8$l305, dataC8$idade, method = "spearman")## Warning in cor.test.default(dataC8$l305, dataC8$idade, method = "spearman"):
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: dataC8$l305 and dataC8$idade
## S = 90962000, p-value = 0.01283
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.08573061
/* LEITE X IDADE */
PROC REG DATA = C8;
MODEL L305 = DIASLACT;
PLOT L305 * DIASLACT = 'SIMBOL';
RUN;
lm1 <- lm(l305 ~ diaslact, data = dataC8)
anova(lm1)## Analysis of Variance Table
##
## Response: l305
## Df Sum Sq Mean Sq F value Pr(>F)
## diaslact 1 841968379 841968379 170.45 < 2.2e-16 ***
## Residuals 840 4149449073 4939820
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)##
## Call:
## lm(formula = l305 ~ diaslact, data = dataC8)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11214.6 -1394.4 349.3 1556.3 5168.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4294.8843 172.4650 24.90 <2e-16 ***
## diaslact 5.8274 0.4464 13.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2223 on 840 degrees of freedom
## Multiple R-squared: 0.1687, Adjusted R-squared: 0.1677
## F-statistic: 170.4 on 1 and 840 DF, p-value: < 2.2e-16
plot(l305 ~ diaslact, data = dataC8, col = "blue", pch = 3)
abline(lm1)
DATA E;
INPUT TRAT $ DOSE @@;
CARDS;
1 24.1 2 15.5 3 19.3 4 18.7
1 23.5 2 19.1 3 19.2 4 21.2
1 20.7 2 16.6 3 17.7 4 18.7
1 23.5 2 19.1 3 21.7 4 21.2
1 22.7 2 18.2 3 18.6 4 19.2
;
RUN; /* 20 observations and 2 variables */
# Primeiro criar os vetores de trat e dose
trat <- c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4)
dose <- c(24.1, 23.5, 20.7, 23.5, 22.7, 15.5, 19.1, 16.6, 19.1, 18.2, 19.3, 19.2, 17.7, 21.7, 18.6, 18.7, 21.2, 18.7, 21.2, 19.2)
# juntar os vetores no data frame E
dataE <- data.frame(trat, dose)## trat dose
## 1 1 24.1
## 2 1 23.5
## 3 1 20.7
## 4 1 23.5
## 5 1 22.7
## 6 2 15.5
## 7 2 19.1
## 8 2 16.6
## 9 2 19.1
## 10 2 18.2
## 11 3 19.3
## 12 3 19.2
## 13 3 17.7
## 14 3 21.7
## 15 3 18.6
## 16 4 18.7
## 17 4 21.2
## 18 4 18.7
## 19 4 21.2
## 20 4 19.2
DATA E1; SET E;
PROC GLM;
CLASS TRAT;
MODEL DOSE=TRAT;
MEANS TRAT/TUKEY;
RUN;
dataE1 <- aov(dose ~ factor(trat), data = dataE) # fazer ANOVA, primeiramente
summary(dataE1)## Df Sum Sq Mean Sq F value Pr(>F)
## factor(trat) 3 71.04 23.679 11.56 0.00028 ***
## Residuals 16 32.78 2.049
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qtukey(p = 0.95, nmeans = 4, df = 16) # amplitude total studentizada## [1] 4.046093
4.0461*sqrt(2.0487/5) # diferença mínima significativa## [1] 2.589947
aggregate(dose ~ trat, data = dataE, FUN = mean) # médias por tratamento## trat dose
## 1 1 22.9
## 2 2 17.7
## 3 3 19.3
## 4 4 19.8
TukeyHSD(dataE1) # teste Tukey, sem letrinhas...## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = dose ~ factor(trat), data = dataE)
##
## $`factor(trat)`
## diff lwr upr p adj
## 2-1 -5.2 -7.7899737 -2.6100263 0.0001605
## 3-1 -3.6 -6.1899737 -1.0100263 0.0053714
## 4-1 -3.1 -5.6899737 -0.5100263 0.0164686
## 3-2 1.6 -0.9899737 4.1899737 0.3238338
## 4-2 2.1 -0.4899737 4.6899737 0.1348926
## 4-3 0.5 -2.0899737 3.0899737 0.9445650
# ..mas com gráfico
plot(TukeyHSD(dataE1))
PROC EXPORT DATA= WORK.C8
OUTFILE= "F:\2017\Doutorado\Disciplinas\R\data_final.xlsx"
DBMS=EXCELCS REPLACE;
SHEET="data_final";
RUN;
write.csv(dataC8, file = "data_final.csv", row.names = FALSE) # ou write.csv2